#!/bin/bash
#Copy raw read counts for each sample into one directory and remove info that isn't read counts

#Copy raw read counts for each sample into one directory
#make new directory containing all raw read counts
mkdir All_Raw_Reads

#loop through sample directories
cat sampleInfo | while read read1 read2 parentsample nothing sampleID ;
do
#change into sample directory
cd /scratch/cmp1g20/RMS_resistance_data/ZOES_DATA/RAW_DATA/"$sampleID"
#copy raw reads for each sample into one directory
cp "$sampleID"_ReadsPerGene.out.tab /scratch/cmp1g20/RMS_resistance_data/ZOES_DATA/All_Raw_Reads/"$sampleID"_ReadsPerGene.out.tab
cd ../
done



#change into directory containing all raw read counts
cd ./All_Raw_Reads
cat ../sampleInfo | while read read1 read2 parentsample nothing sampleID;
do
#keep 1st and 2nd column from each file and remove 3rd and 4th columnms
cut -f1,2 "$sampleID"_ReadsPerGene.out.tab > "$sampleID"_ReadsPerGeneUnstranded.out.tab
#remove info from files that isn't read counts
sed '/^N_/d' /scratch/cmp1g20/RMS_resistance_data/ZOES_DATA//All_Raw_Reads/"$sampleID"_ReadsPerGeneUnstranded.out.tab > ./"$sampleID"_RawReadCountsOnly.out
#add header to raw read files
sed -i "1 i GeneID	$sampleID" "$sampleID"_RawReadCountsOnly.out
#remove temporary files
rm "$sampleID"_ReadsPerGene.out.tab
rm "$sampleID"_ReadsPerGeneUnstranded.out.tab
done






